# clear environment
rm(list=ls())

# load packages
library(openxlsx)

# load data set with organization keyword counts
dat = openxlsx::read.xlsx("vsberichte_metadata.xlsx", sheet = 1)

# binary varibable indicating whether interior minister is from a center-right party
dat$cr_intmin <- ifelse(dat$intmin_party == "cdu" | dat$intmin_party== "csu" | dat$intmin_party=="fdp", 1, ifelse(dat$intmin_party=="pro", NA, 0))

# remove those without center-right or center-left interior minister
dat <- dat[!is.na(dat$cr_intmin),]

# logged ratio of organization keywords in chapters on right-wing extremism and left-wing extremism
dat$ratio_log_orga <- log(dat$count_orga_kw_right_chapter + 0.5) - log(dat$count_orga_kw_left_chapter + 0.5)

# ratio in number of words
dat$rwe_lwe_words_ratio <- dat$count_words_right_chapter / dat$count_words_left_chapter

# ratio in crimes
dat$rwe_lwe_crime_ratio <- dat$extreme_crime_right / dat$extreme_crime_left

# create decade indicator
dat$decade <- NA
dat$decade[dat$year<1960]<-1950
dat$decade[dat$year>=1960 & dat$year<1970]<-1960
dat$decade[dat$year>=1970 & dat$year<1980]<-1970
dat$decade[dat$year>=1980 & dat$year<1990]<-1980
dat$decade[dat$year>=1990 & dat$year<2000]<-1990
dat$decade[dat$year>=2000 & dat$year<2010]<-2000
dat$decade[dat$year>=2010 & dat$year<2020]<-2010
dat$decade[dat$year>=2020 & dat$year<2030]<-2020
dat <- dat[dat$year>1950,]

# merge in polling data
polbar <- readstata13::read.dta13("polbar_agg.dta")
polbar$jurisdiction <- tolower(polbar$jurisdiction)
dat$jurisdiction <- gsub("ü", "u", dat$jurisdiction)
dat <- merge(dat, polbar, by.x=c("jurisdiction", "year_pub"), by.y=c("jurisdiction", "year"), all.x=T)

# regression models
mod_1<-lm(ratio_log_orga~factor(cr_intmin)*polbar_fr_vote_pct, data=dat)
mod_2<-lm(ratio_log_orga~factor(cr_intmin)*polbar_fr_vote_pct+factor(jurisdiction), data=dat)
mod_3<-lm(ratio_log_orga~factor(cr_intmin)*polbar_fr_vote_pct+factor(jurisdiction)+factor(decade), data=dat)
mod_4<-lm(ratio_log_orga~factor(cr_intmin)*polbar_fr_vote_pct+factor(jurisdiction)+factor(year), data=dat)
mod_5<-lm(ratio_log_orga~factor(cr_intmin)*polbar_fr_vote_pct+rwe_lwe_words_ratio+factor(jurisdiction)+factor(year), data=dat)

